clear all
capture log close
set more off
set varabbrev off

cd "C:\Users\ASUS\Dropbox\ADB Bhutan TVET\Mentoring RCT2021fall\analysis"

* directory holding do files
gl dofiles "./code"

gl data "./data"

* directory holding original baseline survey datasets
gl raw "./data/raw"

* directory holding temporary datasets
gl temp "./data/temp"

* directory holding cleaned datasets for the analysis
gl clean "./data/clean"

* directory holding randomization results
gl randomization "./output/randomization"

* directory holding endline tracking results 
gl endline "./output/endline_tracking"

* directory holding mentor report results 
gl mentor_report "./output/mentor_report"

* directory holding tables and figures
gl tabfig "./output/tables_figures"

* directory holding log files
gl logfile "./output/log_files"

local tcdate = subinstr("$S_DATE", " ", "", .)
global date "`tcdate'"



log using "$logfile/merge_grade10students_endline.log", replace

// Data 
 import excel "C:\Users\ASUS\Dropbox\ADB Bhutan TVET\Mentoring RCT2021fall\analysis\data\raw\student\Class XI Students.xlsx", sheet("Sheet1") firstrow
save "$raw/student/grade10_streamchoice.dta", replace

use "$raw/student/grade10_streamchoice.dta", clear
ren Stream streamchoice
ren student_code studentid_b

ren CidNo citizenid
replace citizenid = "" if length(citizenid)~=11
replace citizenid = "" if citizenid == "UNDER PROCE"
destring citizenid, replace

replace name = trim(itrim(lower(name)))
replace name = subinstr(name," ","",.)  
replace name = subinstr(name,",","",.)   //Removes comma (,)
replace name = subinstr(name,"'","",.)   //Removes apostrophe (')
replace name = subinstr(name,".","",.)   //Removes dot (.) 
replace name = subinstr(name,"/","",.)   //Removes slash (/)
replace name = subinstr(name,"-","",.)   //Removes dash (-)
replace name = subinstr(name,"=","",.)   //Removes dash (-)
replace name = subinstr(name,"(","",.)   //Removes opening parentheses
replace name = subinstr(name,")","",.)   //Removes closing parentheses

ren OrgName schoolname_b
replace schoolname_b = trim(itrim(lower(schoolname_b)))
replace schoolname_b = "yangchen gatshel middle secondary school" if schoolname_b == "yangchen gatshel"
replace schoolname_b = "bajo higher secondary school" if schoolname_b == "bajo"
replace schoolname_b = "dechencholing higher secondary school" if schoolname_b == "dechhenchoeling"
replace schoolname_b = "pelkhil higher secondary school" if schoolname_b == "pelkhil"
replace schoolname_b = "motithang higher secondary school" if schoolname_b == "motithang"
replace schoolname_b = "yangchenphug higher secondary school" if schoolname_b == "yangchenphug"
replace schoolname_b = "samdrupjongkhar middle secondary school" if schoolname_b == "samdrupjongkhar"
replace schoolname_b = "tashitse higher secondary school" if schoolname_b == "trashitse"
replace schoolname_b = "babesa higher secondary school" if schoolname_b == "babesa"
replace schoolname_b = "dashiding higher secondary school" if schoolname_b == "dashiding"
replace schoolname_b = "druk school" if schoolname_b == "druk"
replace schoolname_b = "gedu higher secondary school" if schoolname_b == "gedu"
replace schoolname_b = "karmaling higher secondary school" if schoolname_b == "karmaling"
replace schoolname_b = "lamgong middle secondary school" if schoolname_b == "lamgong"
replace schoolname_b = "lhuentse higher secondary school" if schoolname_b == " lhuentse"
replace schoolname_b = "pelkhil higher secondary school" if schoolname_b == "pelkhil"
replace schoolname_b = "samtse higher secondary school" if schoolname_b == "samtse"
replace schoolname_b = "tashitse higher secondary school" if schoolname_b == "tashitse"

* school names in the streamchoice.dta that do not have any matches in master file
* replace schoolname_b = "" if schoolname_b == "desi"
* replace schoolname_b = "" if schoolname_b == "gelephu"
* replace schoolname_b = "" if schoolname_b == "gomtu"
* replace schoolname_b = "" if schoolname_b == "jakar"
* replace schoolname_b = "" if schoolname_b == "karma academy"
* replace schoolname_b = "" if schoolname_b == "peljorling"
* replace schoolname_b = "" if schoolname_b == "pelrithang"
* replace schoolname_b = "" if schoolname_b == "phuentsholing"
* replace schoolname_b = "" if schoolname_b == "shaba"
* replace schoolname_b = "" if schoolname_b == "shari"
*replace schoolname_b = "" if schoolname_b == "wangsel"

save "$raw/student/grade10_streamchoice.dta", replace


use "$clean/grade10_analysis.dta"
use "$temp/stem_analysis.dta", clear

* You can try merging using student code, citizenship code, name, school name, and any combination of these variables. 
merge 1:1 studentid_b using $raw/student/grade10_streamchoice.dta, force  //4347 matched

merge 1:1 studentid_b schoolname_b using $raw/student/grade10_streamchoice.dta  //1574 matched


destring citizenid, replace
merge 1:1 citizenid studentid_b using $raw/student/grade10_streamchoice.dta  //3364 matched
merge 1:1 citizenid using $raw/student/grade10_streamchoice.dta  //not unique
merge 1:1 schoolname_b citizenid name using $raw/student/grade10_streamchoice.dta  //not unique

drop if _merge == 2
save "$temp/grade10_temp.dta"
log close